%%HTML
<script src="require.js"></script>
%matplotlib inline
import math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'
Importation des 3 fichiers CSV
df_games = pd.read_csv('data/games.csv', lineterminator='\n')
df_games.drop(columns=['summary'], inplace=True)
df_games['meta_count'] = (df_games['meta_pos'] + df_games['meta_mixed']+ df_games['meta_neg'])
df_games['user_count'] = (df_games['user_pos'] + df_games['user_mixed']+ df_games['user_neg'])
df_games['ratio'] = df_games['meta_count'] / df_games['user_count']
df_games['offset'] = df_games['meta_score'] - df_games['n_user_score']
On construit 4 colonnes de plus :
df_meta=pd.read_csv('data/meta_reviews.csv',lineterminator='\n')
df_users=pd.read_csv('data/user_reviews.csv',lineterminator='\n')
df_games.shape, df_meta.shape, df_users.shape
((31147, 22), (163577, 6), (176912, 6))
df_games.head()
| title | platform | developer | genre | rating | release_date | meta_score | meta_overview | meta_pos | meta_mixed | ... | user_overview | user_pos | user_mixed | user_neg | n_user_score | real_date | meta_count | user_count | ratio | offset | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 007 Legends | Wii U | Eurocom Entertainment Software, Eurocom | ['Action', 'Shooter', 'Shooter', 'First-Person... | T | Dec 11, 2012 | 0 | No score yet | 0 | 0 | ... | Mixed or average reviews | 10.0 | 7.0 | 13.0 | 53.0 | 2012-12-11 | 3 | 30.0 | 0.10000 | -53.0 |
| 1 | 1001 Spikes | Wii U | Nicalis | ['Action', 'General', 'Platformer', 'Platforme... | T | Jun 3, 2014 | 83 | Generally favorable reviews | 5 | 1 | ... | Mixed or average reviews | 14.0 | 3.0 | 6.0 | 70.0 | 2014-06-03 | 6 | 23.0 | 0.26087 | 13.0 |
| 2 | 140 | Wii U | Jeppe Carlsen | ['Action', 'Platformer', '2D'] | E | Sep 1, 2016 | 0 | No score yet | 0 | 3 | ... | No user score yet | 0.0 | 3.0 | 0.0 | 0.0 | 2016-09-01 | 3 | 3.0 | 1.00000 | 0.0 |
| 3 | 153 Hand Video Poker | Wii U | Skunk Software | ['Miscellaneous', 'Gambling'] | T | Jun 2, 2016 | 0 | No score yet | 0 | 0 | ... | No user score yet | 0.0 | 0.0 | 1.0 | 0.0 | 2016-06-02 | 1 | 1.0 | 1.00000 | 0.0 |
| 4 | 360 Breakout | Wii U | nuGAME | ['Action', 'Arcade'] | E | Aug 11, 2016 | 0 | No score yet | 0 | 1 | ... | No user score yet | 0.0 | 1.0 | 0.0 | 0.0 | 2016-08-11 | 1 | 1.0 | 1.00000 | 0.0 |
5 rows × 22 columns
On crée df_games2 qui ne contient que les jeux ayant à la fois une note globale utilisateur et une note globale de critique
df_games2 = df_games.loc[(df_games['meta_overview'] != 'No score yet') & (df_games['user_overview'] != 'No user score yet') & (df_games['user_count'] != 0)]
df_games2.head()
| title | platform | developer | genre | rating | release_date | meta_score | meta_overview | meta_pos | meta_mixed | ... | user_overview | user_pos | user_mixed | user_neg | n_user_score | real_date | meta_count | user_count | ratio | offset | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1001 Spikes | Wii U | Nicalis | ['Action', 'General', 'Platformer', 'Platforme... | T | Jun 3, 2014 | 83 | Generally favorable reviews | 5 | 1 | ... | Mixed or average reviews | 14.0 | 3.0 | 6.0 | 70.0 | 2014-06-03 | 6 | 23.0 | 0.260870 | 13.0 |
| 5 | 3Souls | Wii U | Red Column | ['Action', 'Action Adventure', 'Platformer', '... | E10+ | Jul 28, 2016 | 49 | Generally unfavorable reviews | 0 | 3 | ... | Mixed or average reviews | 4.0 | 2.0 | 1.0 | 69.0 | 2016-07-28 | 4 | 7.0 | 0.571429 | -20.0 |
| 12 | A World of Keflings | Wii U | NinjaBee | ['Strategy', 'Breeding/Constructing', 'General... | E10+ | Nov 13, 2014 | 70 | Mixed or average reviews | 3 | 1 | ... | Generally favorable reviews | 5.0 | 1.0 | 1.0 | 76.0 | 2014-11-13 | 5 | 7.0 | 0.714286 | -6.0 |
| 14 | Abyss | Wii U | EnjoyUp Games | ['Action', 'General', 'Platformer', '2D'] | E | May 1, 2014 | 59 | Mixed or average reviews | 0 | 6 | ... | Mixed or average reviews | 5.0 | 5.0 | 2.0 | 65.0 | 2014-05-01 | 7 | 12.0 | 0.583333 | -6.0 |
| 18 | Adventure Time: Explore the Dungeon Because I ... | Wii U | WayForward | ['Action Adventure', 'Fantasy', 'General', 'Fa... | E10+ | Nov 19, 2013 | 40 | Generally unfavorable reviews | 0 | 2 | ... | Mixed or average reviews | 10.0 | 5.0 | 11.0 | 57.0 | 2013-11-19 | 7 | 26.0 | 0.269231 | -17.0 |
5 rows × 22 columns
px.histogram(df_games2,x='meta_score')
px.histogram(df_games2,x='offset')
On affiche la distribution des ratios "nombre de critiques / nombre de revues utilisatuer". On voit que la grande majorité des ratio est inférieure à 1, montrant qu'il y a plus de participation des utilisateurs. On pourrait parler du pic de la distribution (du mode) pour avoir une idée du ratio moyen.
px.histogram(df_games2,x='ratio',template="simple_white")
Ici un graphe qui montre la distribution des scores meta pour chaque console. Ça se ressemble. Petite tendance qui se dégage : les consoles les plus récentes (Series X et PS5) sont plus distribuées vers les notes hautes. (Pour information, la limite droite de chaque boite indique le 3ème quartile, c'est à dire que 25% des notes sont supérieures)
fig = px.box(df_games2,x='meta_score',color='platform',hover_name='title',
title="Distribution des notes Meta par plateforme",
category_orders={
"platform": ["3DS", "Wii U", "Switch", "Playstation 4", "Playstation 5","Xbox One","Xbox Series X"],
},
template="simple_white")
fig.show()
df =pd.DataFrame(dict(
series=np.concatenate((["Meta Score"]*len(df_games2['meta_score']), ["User Score"]*len(df_games2['n_user_score']))),
data =np.concatenate((df_games2['meta_score'],df_games2['n_user_score']))
))
px.histogram(df, x="data", color="series", barmode="overlay",template="simple_white")
fig=px.scatter(df_games2,x='meta_score',y='n_user_score',
color='platform', hover_data=['title'],size='meta_count',trendline='ols',template="simple_white")
fig.add_shape(type="line",
x0=0, y0=0, x1=100, y1=100,
line=dict(color="RoyalBlue",width=3)
)
fig.update_layout(width=1000, height=800)
fig.show()
df_meta.head().transpose()
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| score | 42.0 | 40.0 | 40.0 | 95.0 | 90.0 |
| critic | IGN | Official Nintendo Magazine UK | Nintendo Life | Destructoid | FNintendo |
| date | Jan 14, 2013 | Jan 27, 2013 | Dec 17, 2012 | Jun 8, 2014 | Nov 5, 2015 |
| title | 007 Legends | 007 Legends | 007 Legends | 1001 Spikes | 1001 Spikes |
| platform | Wii U | Wii U | Wii U | Wii U | Wii U |
| real_date | 2013-01-14 | 2013-01-27 | 2012-12-17 | 2014-06-08 | 2015-11-05 |
# si j'utilisais pyplot
plt.rc('xtick', labelsize=14)
plt.rc('ytick', labelsize=14)
sns.set_style(style='whitegrid')
Which console has more games titles ?
df_games.groupby('platform')['title'].count().sort_values(ascending=False).plot.bar(figsize=(10, 6), rot=0);
Je regarde combien de critiques sont faites par chaque organe identifié dans Metacritique. On comprend ici que TheXboxHub a produit 3923 critiques, Nintendo Life 3864...
fig = px.histogram(df_games2, x = 'meta_score')
fig.show()
df_meta.groupby('critic')['score'].count()
#.sort_values(ascending=False)[:1000]
#.plot.bar(figsize=(10, 6), rot=0);
critic
1UP 20
3DJuegos 1151
4Players.de 1770
ActionTrip 99
Adventure Gamers 26
...
XboxEra 164
Yahoo! 59
ZTGD 1702
games(TM) 654
iMore 54
Name: score, Length: 327, dtype: int64
df = df_meta[df_meta['critic'] == 'TheXboxHub']
#df_meta.groupby('critic')['score'].count().sort_values(ascending=False)[:25]
#.plot.bar(figsize=(10, 6), rot=0);
fig = px.histogram(df_meta, x="score",marginal='box')
fig.show()
Given release date let's add two columns with year and month information
df_meta[df_meta['critic'] == 'TheXboxHub']
| score | critic | date | title | platform | real_date | |
|---|---|---|---|---|---|---|
| 66609 | 70.0 | TheXboxHub | Mar 10, 2021 | 3 out of 10: Season One | Xbox Series X | 2021-03-10 |
| 66612 | 30.0 | TheXboxHub | Oct 31, 2022 | 41 Hours | Xbox Series X | 2022-10-31 |
| 66613 | 80.0 | TheXboxHub | Dec 3, 2021 | 6Souls | Xbox Series X | 2021-12-03 |
| 66614 | 40.0 | TheXboxHub | Nov 16, 2022 | 7 Days of Rose | Xbox Series X | 2022-11-16 |
| 66616 | 60.0 | TheXboxHub | Oct 3, 2022 | 9 Clues 2: The Ward | Xbox Series X | 2022-10-03 |
| ... | ... | ... | ... | ... | ... | ... |
| 163513 | 60.0 | TheXboxHub | Nov 14, 2019 | Zombieland: Double Tap - Road Trip | Xbox One | 2019-11-14 |
| 163516 | 80.0 | TheXboxHub | Jul 14, 2021 | Zombies Ate My Neighbors and Ghoul Patrol | Xbox One | 2021-07-14 |
| 163518 | 60.0 | TheXboxHub | May 10, 2020 | Zombies ruined my day | Xbox One | 2020-05-10 |
| 163519 | 70.0 | TheXboxHub | Jun 20, 2022 | Zombo Buster Advance | Xbox One | 2022-06-20 |
| 163520 | 50.0 | TheXboxHub | Dec 1, 2021 | Zombo Buster Rising | Xbox One | 2021-12-01 |
3923 rows × 6 columns
df['release_date'] = pd.to_datetime(df['release_date'])
df['month'] = df['release_date'].dt.month
df['year'] = df['release_date'].dt.year
EA Sports appears also as 'EA Sports, EA Vancouver' and 'EA Vancouver'. Let's unify the names
df.loc[df['developer'] == 'EA Sports, EA Vancouver', 'developer'] = 'EA Sports'
df.loc[df['developer'] == 'EA Vancouver', 'developer'] = 'EA Sports'
In order to perform analysis related to scores, we need to use those games with reviews
tmp = df.loc[(df['meta_overview'] != 'No score yet') & (df['user_overview'] != 'No user score yet')].copy()
Which console has best average scores ?
tmp.groupby('platform')['meta_score', 'n_user_score'].mean().sort_values('meta_score', ascending=False).plot.bar(figsize=(10, 6), rot=0);
/var/folders/2h/pylp0hzn5td4mz63q7r0cz0h0000gn/T/ipykernel_30894/3120562876.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
tmp.groupby('platform')['meta_score', 'n_user_score'].mean().sort_values('meta_score', ascending=False).plot.bar(figsize=(10, 6), rot=0);
Let's compare metacritics scores vs user scores. (metacritics scores are made by specialized critics sites while user scores by people registered on metacritic.com)
tmp[['meta_score']].plot.hist(bins=90, figsize=(12, 6));
tmp[['n_user_score']].plot.hist(bins=25, figsize=(12, 6));
Meta scores tend to be higher than user scores
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot([0, 100], [0, 100])
tmp.plot.scatter(x='n_user_score', y='meta_score', ax=ax, alpha=0.25);
mo = tmp.groupby('meta_overview')['title'].count()
mo.name = 'meta_overview'
uo = tmp.groupby('user_overview')['title'].count()
uo.name = 'user_overview'
pd.concat([mo, uo], axis=1).sort_values('meta_overview', ascending=False).plot.bar(figsize=(12, 6));
We confirm that there are more 'Generally favorable reviews' in meta scores and more 'Generally unfavorable reviews' in user scores.
Users tend to be more critic in their reviews than specialized sites (paid reviews ?)
How many new titles were released since 2014 ?
Here we compare just PlayStation 4 and Xbox One because both platforms were announced the same year (2013). On the other hand Nintendo Switch in 2016
# df.loc[(df['year'] > 2013) & (df['year'] < 2023) & (df['platform'] != 'Switch')].groupby(['year', 'platform'])['title'].count().unstack().plot.bar(figsize=(10, 6));
df.loc[(df['year'] > 2011) & (df['year'] < 2023)].groupby(['year', 'platform'])['title'].count().unstack().plot.line(figsize=(15, 6));
When are new titles released ?
df.groupby(['year', 'month'])['title'].count().groupby('month').mean().plot(figsize=(12, 6));
There's a peak between August and November (anticipating christmas sales?)
How did average scores evolve since 2014 ?
fig = plt.figure(figsize = (15,6))
ax = fig.add_subplot(1, 1, 1)
tmp.loc[(tmp['year'] > 2012) & (tmp['year'] < 2023), ('meta_score', 'year')].groupby('year').mean().plot(ax = ax, xticks=np.arange(2012, 2022, 1))
tmp.loc[(tmp['year'] > 2012) & (tmp['year'] < 2023), ('n_user_score', 'year')].groupby('year').mean().plot(ax = ax, xticks=np.arange(2012, 2022, 1));
No big changes
How many titles per console have metacritic score greater than 85?
tmp.loc[tmp['meta_score'] >= 85].groupby('platform')['title'].count().sort_values(ascending=False)
platform PlayStation 4 188 Switch 172 Xbox One 158 PlayStation 5 52 Xbox Series X 52 3DS 40 Wii U 31 Name: title, dtype: int64
How many titles per console have user score greater than 85?
tmp.loc[tmp['n_user_score'] >= 85].groupby('platform')['title'].count().sort_values(ascending=False)
platform PlayStation 4 86 Switch 73 Xbox One 47 Xbox Series X 33 Wii U 23 3DS 20 PlayStation 5 16 Name: title, dtype: int64
Let's add a column with the difference between meta and user score
tmp['dif'] = tmp['meta_score'] - tmp['n_user_score']
ps4 = tmp.loc[tmp['platform'] == 'PlayStation 4']
xone = tmp.loc[tmp['platform'] == 'Xbox One']
switch = tmp.loc[tmp['platform'] == 'Switch']
What are the best 20 titles according to metacritic score?
ps4.sort_values('meta_score', ascending=False).reset_index(drop=True)[['title', 'meta_score', 'n_user_score', 'dif']][:20]
| title | meta_score | n_user_score | dif | |
|---|---|---|---|---|
| 0 | Grand Theft Auto V | 97 | 83.0 | 14.0 |
| 1 | Red Dead Redemption 2 | 97 | 87.0 | 10.0 |
| 2 | Persona 5 Royal | 95 | 85.0 | 10.0 |
| 3 | The Last of Us Remastered | 95 | 92.0 | 3.0 |
| 4 | God of War | 94 | 91.0 | 3.0 |
| 5 | The Last of Us Part II | 93 | 58.0 | 35.0 |
| 6 | Metal Gear Solid V: The Phantom Pain | 93 | 82.0 | 11.0 |
| 7 | Persona 5 | 93 | 87.0 | 6.0 |
| 8 | XCOM 2: War of the Chosen | 93 | 69.0 | 24.0 |
| 9 | Uncharted 4: A Thief's End | 93 | 88.0 | 5.0 |
| 10 | Journey | 92 | 83.0 | 9.0 |
| 11 | Divinity: Original Sin II - Definitive Edition | 92 | 80.0 | 12.0 |
| 12 | Undertale | 92 | 73.0 | 19.0 |
| 13 | Bloodborne | 92 | 89.0 | 3.0 |
| 14 | The Witcher 3: Wild Hunt | 92 | 92.0 | 0.0 |
| 15 | The Witcher 3: Wild Hunt - Blood and Wine | 91 | 92.0 | -1.0 |
| 16 | Dragon Quest XI S: Echoes of an Elusive Age - ... | 91 | 80.0 | 11.0 |
| 17 | Resident Evil 2 | 91 | 90.0 | 1.0 |
| 18 | Shadow of the Colossus | 91 | 78.0 | 13.0 |
| 19 | Final Fantasy XIV: Shadowbringers | 91 | 87.0 | 4.0 |
For a lot of those titles there's a big difference between what meta critics and users think
What are the best 20 titles according to user score?
ps4.sort_values('n_user_score', ascending=False).reset_index(drop=True)[['title', 'n_user_score', 'meta_score', 'dif']][:20]
| title | n_user_score | meta_score | dif | |
|---|---|---|---|---|
| 0 | Wanderer | 98.0 | 72 | -26.0 |
| 1 | Dynasty Warriors 9 Empires | 96.0 | 58 | -38.0 |
| 2 | Crystar | 96.0 | 67 | -29.0 |
| 3 | Bayonetta & Vanquish | 93.0 | 85 | -8.0 |
| 4 | Star Renegades | 93.0 | 78 | -15.0 |
| 5 | Fatal Frame: Maiden of Black Water | 93.0 | 71 | -22.0 |
| 6 | The Witcher 3: Wild Hunt | 92.0 | 92 | 0.0 |
| 7 | The Witcher 3: Wild Hunt - Blood and Wine | 92.0 | 91 | -1.0 |
| 8 | Infinifactory | 92.0 | 70 | -22.0 |
| 9 | The Evil Within: The Assignment | 92.0 | 80 | -12.0 |
| 10 | The Last of Us Remastered | 92.0 | 95 | 3.0 |
| 11 | God of War | 91.0 | 94 | 3.0 |
| 12 | Ghost of Tsushima | 91.0 | 83 | -8.0 |
| 13 | The Evil Within: The Consequence | 91.0 | 75 | -16.0 |
| 14 | Vanquish | 91.0 | 77 | -14.0 |
| 15 | Dark Souls III | 90.0 | 89 | -1.0 |
| 16 | Astro Bot: Rescue Mission | 90.0 | 90 | 0.0 |
| 17 | Atelier Sophie 2: The Alchemist of the Mysteri... | 90.0 | 82 | -8.0 |
| 18 | Rise of the Third Power | 90.0 | 79 | -11.0 |
| 19 | The Witcher 3: Wild Hunt - Hearts of Stone | 90.0 | 90 | 0.0 |
With exception of AO Tennis, meta critics and users scores seem to be similar
What are the top 10 titles most overrated by metacritic? (user point of view)
ps4.sort_values('dif', ascending=False)[:20][['title', 'meta_score', 'n_user_score', 'dif']].reset_index(drop=True)
| title | meta_score | n_user_score | dif | |
|---|---|---|---|---|
| 0 | ZEN Pinball 2: Ant-Man Pinball | 81 | 12.0 | 69.0 |
| 1 | FIFA 20 | 79 | 13.0 | 66.0 |
| 2 | NBA 2K20 | 78 | 14.0 | 64.0 |
| 3 | FIFA 21 | 72 | 9.0 | 63.0 |
| 4 | Tom Clancy's The Division 2: Warlords of New York | 74 | 11.0 | 63.0 |
| 5 | Madden NFL 20 | 76 | 14.0 | 62.0 |
| 6 | NBA 2K18 | 80 | 18.0 | 62.0 |
| 7 | Tales of Symphonia Remastered | 70 | 8.0 | 62.0 |
| 8 | Megaquarium | 78 | 16.0 | 62.0 |
| 9 | FIFA 19 | 83 | 22.0 | 61.0 |
| 10 | Madden NFL 21 | 63 | 3.0 | 60.0 |
| 11 | EA SPORTS UFC 4 | 78 | 18.0 | 60.0 |
| 12 | Madden NFL 19 | 80 | 20.0 | 60.0 |
| 13 | Call of Duty: Infinite Warfare - Sabotage | 76 | 17.0 | 59.0 |
| 14 | NBA 2K21 | 68 | 9.0 | 59.0 |
| 15 | Gran Turismo 7 | 82 | 26.0 | 56.0 |
| 16 | Call of Duty: WWII - The War Machine | 73 | 17.0 | 56.0 |
| 17 | NBA 2K19 | 82 | 28.0 | 54.0 |
| 18 | Call of Duty: Black Ops III - Salvation | 74 | 21.0 | 53.0 |
| 19 | Call of Duty: Infinite Warfare - Continuum | 75 | 22.0 | 53.0 |
What are the top 10 titles most underrated by metacritic? (user point of view)
ps4.sort_values('dif')[:10][['title', 'meta_score', 'n_user_score', 'dif']].reset_index(drop=True)
| title | meta_score | n_user_score | dif | |
|---|---|---|---|---|
| 0 | Left Alive | 37 | 82.0 | -45.0 |
| 1 | Dynasty Warriors 9 Empires | 58 | 96.0 | -38.0 |
| 2 | Dead or Alive Xtreme 3: Fortune | 43 | 75.0 | -32.0 |
| 3 | Crystal Rift | 37 | 66.0 | -29.0 |
| 4 | Crystar | 67 | 96.0 | -29.0 |
| 5 | AO Tennis | 44 | 73.0 | -29.0 |
| 6 | Big Bash Boom | 55 | 83.0 | -28.0 |
| 7 | Terminator: Resistance | 47 | 75.0 | -28.0 |
| 8 | Energy Hook | 22 | 49.0 | -27.0 |
| 9 | Spuds Unearthed | 43 | 70.0 | -27.0 |
Let's see more information about AO Tennis
ps4.loc[ps4['title'] == 'AO Tennis'].transpose()
| 1242 | |
|---|---|
| title | AO Tennis |
| platform | PlayStation 4 |
| developer | Big Ant Studios |
| genre | ['Sports', 'Individual', 'Tennis'] |
| rating | NaN |
| release_date | 2018-01-16 00:00:00 |
| meta_score | 44 |
| meta_overview | Generally unfavorable reviews |
| meta_pos | 0 |
| meta_mixed | 2 |
| meta_neg | 2 |
| user_score | 7.3 |
| user_overview | Mixed or average reviews |
| user_pos | 28.0 |
| user_mixed | 1.0 |
| user_neg | 11.0 |
| n_user_score | 73.0 |
| real_date | 2018-01-16 |
| month | 1 |
| year | 2018 |
| dif | -29.0 |
It's a new game with few critics reviews
ps4_list = set(df.loc[df['platform'] == 'PlayStation 4', 'title'])
xone_list = set(df.loc[df['platform'] == 'Xbox One', 'title'])
switch_list = set(df.loc[df['platform'] == 'Switch', 'title'])
How many exclusive games PlayStation 4 got?
ps4_exclusives = tmp.loc[tmp['title'].isin(list(ps4_list.difference(xone_list).difference(switch_list))), ('title', 'n_user_score', 'meta_score')].sort_values('n_user_score', ascending=False).reset_index(drop=True)
len(ps4_exclusives)
700
What are the top 25 exclusives PlayStation 4 games?
ps4_exclusives[:25]
| title | n_user_score | meta_score | |
|---|---|---|---|
| 0 | Wanderer | 98.0 | 72 |
| 1 | Infinifactory | 92.0 | 70 |
| 2 | The Last of Us Remastered | 92.0 | 95 |
| 3 | Ghost of Tsushima | 91.0 | 83 |
| 4 | God of War | 91.0 | 94 |
| 5 | Astro Bot: Rescue Mission | 90.0 | 90 |
| 6 | NieR: Automata | 89.0 | 88 |
| 7 | Bloodborne: The Old Hunters | 89.0 | 87 |
| 8 | Bloodborne | 89.0 | 92 |
| 9 | NieR: Automata - Game of the YoRHa Edition | 89.0 | 91 |
| 10 | Detroit: Become Human | 88.0 | 78 |
| 11 | Uncharted 4: A Thief's End | 88.0 | 93 |
| 12 | Utawarerumono: Mask of Truth | 88.0 | 75 |
| 13 | Final Fantasy XIV: Stormblood | 88.0 | 89 |
| 14 | Final Fantasy XIV: Endwalker | 87.0 | 90 |
| 15 | Marvel's Spider-Man | 87.0 | 87 |
| 16 | Persona 5 | 87.0 | 93 |
| 17 | Dreams | 87.0 | 89 |
| 18 | Final Fantasy XIV: Shadowbringers | 87.0 | 91 |
| 19 | Monster of the Deep: Final Fantasy XV | 87.0 | 62 |
| 20 | Uncharted: The Nathan Drake Collection | 86.0 | 86 |
| 21 | The Legend of Heroes: Trails of Cold Steel | 86.0 | 80 |
| 22 | Sackboy: A Big Adventure | 86.0 | 79 |
| 23 | Dragon Quest XI: Echoes of an Elusive Age | 86.0 | 86 |
| 24 | Heavy Rain & Beyond: Two Souls Collection | 85.0 | 78 |
How many exlusive PlayStation 4 games have user score equal or greater than 80 ?
len(ps4_exclusives.loc[ps4_exclusives['n_user_score'] >= 80])
87
How many exclusive games Xbox One got?
xone_exclusives = tmp.loc[tmp['title'].isin(list(xone_list.difference(ps4_list).difference(switch))), ('title', 'n_user_score', 'meta_score')].sort_values('n_user_score', ascending=False).reset_index(drop=True)
len(xone_exclusives)
325
What are the top 25 exclusives Xbox One games?
xone_exclusives[:25]
| title | n_user_score | meta_score | |
|---|---|---|---|
| 0 | NieR: Automata - Become as Gods Edition | 91.0 | 90 |
| 1 | Ori and the Will of the Wisps | 89.0 | 90 |
| 2 | Syberia: The World Before | 88.0 | 82 |
| 3 | Ori and the Will of the Wisps | 87.0 | 92 |
| 4 | Ori and the Will of the Wisps | 87.0 | 93 |
| 5 | Ori and the Blind Forest | 86.0 | 88 |
| 6 | Age of Empires II: Definitive Edition | 86.0 | 86 |
| 7 | Katana ZERO | 85.0 | 83 |
| 8 | Ori and the Blind Forest: Definitive Edition | 85.0 | 90 |
| 9 | Floppy Knights | 85.0 | 69 |
| 10 | Vampire Survivors | 85.0 | 95 |
| 11 | Octopath Traveler | 84.0 | 83 |
| 12 | Enter The Gungeon | 83.0 | 85 |
| 13 | Omensight: Definitive Edition | 83.0 | 81 |
| 14 | Syberia: The World Before | 83.0 | 77 |
| 15 | Tunic | 83.0 | 88 |
| 16 | Dying Light: Platinum Edition | 83.0 | 78 |
| 17 | Forza Horizon 4 | 83.0 | 92 |
| 18 | Forza Horizon 2 | 83.0 | 86 |
| 19 | Tunche | 82.0 | 71 |
| 20 | MouseCraft | 82.0 | 77 |
| 21 | The Binding of Isaac: Afterbirth + | 82.0 | 85 |
| 22 | Turok 2: Seeds of Evil Remaster | 82.0 | 78 |
| 23 | The Last Oricru | 82.0 | 57 |
| 24 | Forza Horizon 5 | 82.0 | 92 |
How many exlusive Xbox One games have user score equal or greater than 80 ?
len(xone_exclusives.loc[xone_exclusives['n_user_score'] >= 80])
44
How many exclusive games Switch got?
switch_exclusives = tmp.loc[tmp['title'].isin(list(switch_list.difference(ps4_list).difference(xone_list))), ('title', 'n_user_score', 'meta_score')].sort_values('n_user_score', ascending=False).reset_index(drop=True)
len(switch_exclusives)
666
What are the top 25 exclusives Switch games?
switch_exclusives[:25]
| title | n_user_score | meta_score | |
|---|---|---|---|
| 0 | Ultra Kaiju Monster Rancher | 95.0 | 75 |
| 1 | Touken Ranbu Warriors | 92.0 | 68 |
| 2 | Monster Rancher 1 & 2 DX | 90.0 | 70 |
| 3 | Bayonetta 2 | 89.0 | 91 |
| 4 | Fire Emblem Warriors: Three Hopes | 89.0 | 81 |
| 5 | Donkey Kong Country: Tropical Freeze | 89.0 | 83 |
| 6 | Astral Chain | 89.0 | 87 |
| 7 | Super Mario Odyssey | 89.0 | 97 |
| 8 | Kirby and the Forgotten Land | 88.0 | 85 |
| 9 | Xenoblade Chronicles: Definitive Edition | 88.0 | 89 |
| 10 | Fire Emblem: Three Houses | 88.0 | 89 |
| 11 | Metroid Prime Remastered | 88.0 | 94 |
| 12 | Picross S 4 | 87.0 | 77 |
| 13 | Bayonetta + Bayonetta 2 | 87.0 | 90 |
| 14 | Super Mario 3D World + Bowser's Fury | 87.0 | 89 |
| 15 | The Legend of Zelda: Breath of the Wild | 87.0 | 97 |
| 16 | Captain Toad: Treasure Tracker | 86.0 | 81 |
| 17 | Ring Fit Adventure | 86.0 | 83 |
| 18 | The House in Fata Morgana - Dreams of the Reve... | 86.0 | 96 |
| 19 | Donkey Kong Country: Tropical Freeze | 86.0 | 86 |
| 20 | Pikmin 3 Deluxe | 86.0 | 85 |
| 21 | Mario Kart 8 Deluxe | 86.0 | 92 |
| 22 | Bayonetta 2 | 86.0 | 92 |
| 23 | Metroid Dread | 86.0 | 88 |
| 24 | Monster Train First Class | 85.0 | 85 |
How many exlusive Switch games have user score equal or greater than 80 ?
len(switch_exclusives.loc[switch_exclusives['n_user_score'] >= 80])
146
Who are the top 10 developers with more released titles?
ps4.groupby('developer')['title'].count().sort_values(ascending=False)[:10]
developer Telltale Games 53 Capcom 41 Square Enix 31 Omega Force 23 Ubisoft Montreal 21 EA Sports 20 Zen Studios 20 Bandai Namco Games 18 Milestone S.r.l 16 Nippon Ichi Software 16 Name: title, dtype: int64
Does quantity mean quality?
dev = pd.concat([ps4.groupby('developer')['title'].count(), ps4.groupby('developer')['n_user_score'].mean(), ps4.groupby('developer')['meta_score'].mean()], axis=1)
dev.columns = ['count', 'n_user_score_avg', 'meta_score_avg']
dev.sort_values('count', ascending=False)[:10]
| count | n_user_score_avg | meta_score_avg | |
|---|---|---|---|
| developer | |||
| Telltale Games | 53 | 65.811321 | 73.150943 |
| Capcom | 41 | 70.121951 | 76.195122 |
| Square Enix | 31 | 76.580645 | 76.290323 |
| Omega Force | 23 | 74.000000 | 71.826087 |
| Ubisoft Montreal | 21 | 66.428571 | 71.761905 |
| EA Sports | 20 | 45.800000 | 73.550000 |
| Zen Studios | 20 | 54.200000 | 78.350000 |
| Bandai Namco Games | 18 | 58.611111 | 68.666667 |
| Milestone S.r.l | 16 | 60.312500 | 69.625000 |
| Nippon Ichi Software | 16 | 66.000000 | 70.625000 |
It seems that not
Who are the best 10 developers according to user score (at least 4 released titles)?
dev.loc[dev['count'] >= 4].sort_values('n_user_score_avg', ascending=False)[:10]
| count | n_user_score_avg | meta_score_avg | |
|---|---|---|---|
| developer | |||
| CD Projekt Red Studio | 4 | 86.750000 | 88.250000 |
| Tango Gameworks | 5 | 83.000000 | 73.000000 |
| From Software | 9 | 83.000000 | 83.333333 |
| Falcom | 10 | 81.200000 | 79.300000 |
| Arkane Studios | 5 | 81.000000 | 82.400000 |
| Ryu ga Gotoku Studios | 10 | 79.800000 | 80.400000 |
| Naughty Dog | 4 | 79.500000 | 91.250000 |
| id Software | 6 | 77.833333 | 78.666667 |
| Grasshopper Manufacture | 4 | 77.250000 | 66.750000 |
| PlatinumGames | 10 | 76.600000 | 75.200000 |
What are the titles that gave them such a good reputation ?
From Software
ps4.loc[ps4['developer'] == 'From Software', 'title'].reset_index(drop=True)
0 Bloodborne 1 Bloodborne: The Old Hunters 2 Dark Souls II: Scholar of the First Sin 3 Dark Souls III 4 Dark Souls III: Ashes of Ariandel 5 Dark Souls III: The Ringed City 6 Dark Souls Remastered 7 Deracine 8 Sekiro: Shadows Die Twice Name: title, dtype: object
DONTNOD Entertainment
ps4.loc[ps4['developer'] == 'DONTNOD Entertainment', 'title'].reset_index(drop=True)
0 Life is Strange 1 Life is Strange 2 2 Life is Strange 2: Episode 1 - Roads 3 Life is Strange 2: Episode 2 - Rules 4 Life is Strange 2: Episode 3 - Wastelands 5 Life is Strange 2: Episode 4 - Faith 6 Life is Strange 2: Episode 5 - Wolves 7 Life is Strange: Episode 1 - Chrysalis 8 Life is Strange: Episode 2 - Out of Time 9 Life is Strange: Episode 3 - Chaos Theory 10 Life is Strange: Episode 4 - Dark Room 11 Life is Strange: Episode 5 - Polarized 12 The Awesome Adventures of Captain Spirit 13 Vampyr Name: title, dtype: object
Who are the top 10 developers whose titles are on average more overrated by metacritic (at least 4 released titles) ?
dev = pd.concat([ps4.groupby('developer')['dif'].mean().sort_values(ascending=False), ps4.groupby('developer')['title'].count()], axis=1)
dev.columns = ['dif_avg', 'count']
dev.loc[dev['count'] >= 4].sort_values('dif_avg', ascending=False)[:10]
| dif_avg | count | |
|---|---|---|
| developer | ||
| Electronic Arts | 55.142857 | 7 |
| Sledgehammer Games | 39.800000 | 5 |
| Visual Concepts | 38.500000 | 10 |
| Treyarch | 33.714286 | 7 |
| Infinity Ward | 33.555556 | 9 |
| Bungie | 30.333333 | 9 |
| EA Sports | 27.750000 | 20 |
| Massive Entertainment | 25.250000 | 4 |
| Zen Studios | 24.150000 | 20 |
| EA DICE | 23.375000 | 8 |
What are the top 10 EA Sports games according to metacritic?
ea = ps4.loc[ps4['developer'] == 'EA Sports', ['title', 'meta_score', 'n_user_score']].sort_values('meta_score', ascending=False)[:10]
ea.set_index('title').plot.bar(figsize=(14, 6));
Who are the top 10 developers whose titles are on average more underrated by metacritic (at least 4 released titles)?
dev.loc[dev['count'] >= 4].sort_values('dif_avg')[:10]
| dif_avg | count | |
|---|---|---|
| developer | ||
| Grasshopper Manufacture | -10.500000 | 4 |
| Tango Gameworks | -10.000000 | 5 |
| Big Ant Studios | -8.500000 | 8 |
| Dimps Corporation | -7.800000 | 5 |
| CyberConnect2 | -4.571429 | 7 |
| Koei Tecmo Games | -4.538462 | 13 |
| Acquire | -3.833333 | 6 |
| Omega Force | -2.173913 | 23 |
| Cyanide, Cyanide Studios | -2.000000 | 4 |
| Falcom | -1.900000 | 10 |
ps4.loc[ps4['developer'] == 'Grasshopper Manufacture', 'title'].reset_index(drop=True)
0 Let It Die 1 The 25th Ward: The Silver Case 2 The Silver Case 3 Travis Strikes Again: No More Heroes - Complet... Name: title, dtype: object
ps4.loc[ps4['developer'] == 'Acquire', 'title'].reset_index(drop=True)
0 Aegis of Earth: Protonovus Assault 1 Akiba's Beat 2 Akiba's Trip: Hellbound & Debriefed 3 Akiba's Trip: Undead & Undressed 4 Katana Kami: A Way of the Samurai Story 5 No Heroes Allowed! VR Name: title, dtype: object
How many titles per rating category?
df['rating'].value_counts().plot.bar(figsize=(12, 6), rot=0);
E: Everyone
E+10: Everyone +10
T: Teen +13
M: Mature +17
Which console is more "family" oriented?
qty = df.loc[(df['rating'].isnull() == False) & (df['rating'] != 'RP')].groupby('platform')['title'].count()
#((df.loc[df['rating'] != 'RP'].groupby(['platform', 'rating'])['title'].count().unstack().transpose() / qty) * 100).plot.bar(figsize=(12, 6), rot=0);
rat = (df.loc[df['rating'] != 'RP'].groupby(['platform', 'rating'])['title'].count().unstack().transpose() / qty) * 100
rat = rat[['PlayStation 4', 'Xbox One', 'Switch']]
fig = plt.figure(figsize = (10,6))
ax = fig.add_subplot(1, 1, 1)
rat.iloc[0].plot.bar(ax=ax, color='beige', edgecolor='white', label=rat.index[0]);
rat.iloc[1].plot.bar(ax=ax, bottom=rat.iloc[0], color='navajowhite', edgecolor='white', label=rat.index[1]);
rat.iloc[3].plot.bar(ax=ax, bottom=rat.iloc[:2].sum().values, color='lightsalmon', edgecolor='white', label=rat.index[3]);
rat.iloc[2].plot.bar(ax=ax, bottom=(rat.iloc[:2].sum() + rat.iloc[3]).values, color='firebrick', edgecolor='white', label=rat.index[2], rot=0);
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1);
Switch has in proportion more games suited for everyone
Which category has the best games ?
tmp.loc[(tmp['rating'].isnull() == False) & (tmp['rating'] != 'RP')].groupby('rating')['n_user_score'].mean().sort_values(ascending=False).plot.bar(figsize=(12, 6), rot=0);
Mature category seems to has an advantage